required libraries
source("UnsupervisedAnalysis_Functions.R")
library(ggplot2)
library(circlize)
library(cowplot)
library(pracma)
library(circlize)
set.seed(123)
We load the data for the unsupervised analysis object that we produced using kmeans clustering
US <- readRDS("../data/USData.rds")
we add the B-Soid clustering data. we cut the first 300 frames (as we did with kmeans), since these are sometimes accompanied with noise from delayed animal placement / lighting changes etc. we create a USDataCheck report to inspect the complexity of the data and a USDataCheck to ensure that all labels have exactly the same number of frames for each file.
US <- AddFromBsoid(US,"../BSOID/Clustering1Data/",cut_start = 300,lab_name = "BSOID.8")
US <- AddFromBsoid(US,"../BSOID/Clustering2Data/",cut_start = 300,lab_name = "BSOID.27")
head(USDataReport(US))
USDataCheck(US)
we first smooth the data over 5 frames, this removes clusters that were identified in a single frame and ensures that beahvior trains are better resolved
US <- SmoothLabels_US(US, 5)
We calculate metrics, such as number of onset-offset and number of frames for each cluster.
US <- CalculateMetrics(US)
Next, we calculate the trainsitionsmatrix for each clustering analysis in each file
US <- AddTransitionMatrixData(US)
We finally calculate the confusion matrix between each clustering result across all files. this will allow us to compare different algorithms and see if they result in clusters that map strongly to each other
US <- AddConfusionMatrix(US)
We save this object, so next time we do not have to re-process it
saveRDS(US,"../data/USData.rds")
In this section we will investigate how well clusters from different kmeans algorithms correlate to each other and to rearing beahavoir in mice
PlotConfusionMatrix(US$ConfusionMatrix_norm$`BSOID-rear.classifier`,pointsize = 4, hclust = TRUE)
PlotConfusionMatrix(US$ConfusionMatrix_norm$`BSOID.27-rear.classifier`,pointsize = 4, hclust = TRUE)
PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.10-rear.classifier`,pointsize = 4, hclust = TRUE)
PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.25-rear.classifier`,pointsize = 4, hclust = TRUE)
PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.50-rear.classifier`,pointsize = 4, hclust = TRUE)
PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.100-rear.classifier`,pointsize = 4, hclust = TRUE)
we notice that with increase clusters numbers more specifically map to
annotated beahviors in general
PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.svd.10-rear.classifier`,pointsize = 4, hclust = TRUE)
PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.svd.25-rear.classifier`,pointsize = 4, hclust = TRUE)
PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.svd.50-rear.classifier`,pointsize = 4, hclust = TRUE)
PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.svd.100-rear.classifier`,pointsize = 4, hclust = TRUE)
using svd prior to clustering does not alter the mapping to annotated behaviors strongly
PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.50-kmeans.svd.50`,pointsize = 4, hclust = TRUE)
kmeans and svd + kmeans finds quite different solutions and poor overlap
for most clusters
PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.100-kmeans.50`,pointsize = 4, hclust = TRUE)
As we increase the number of clusters, some are retained, and some are further subdivided into multiple sub-clusters. mapping between kmeans 50 and kmeans 100 is much better than between kmeans 50 and kmeans + svd 50
PlotBehaviorFlow(US,lab = "kmeans.25")
PlotBehaviorFlow(US,lab = "kmeans.svd.25")
PlotBehaviorFlow(US,lab = "BSOID.27")
Overall the transitions for the kmeans + svd cluster are not as well defined as the kmeans without svd
US_CSI <-SplitUSData(US, select = (US$meta$Session == "PostCSI"))
US_CSI$Results <- NULL
PlotBehaviorFlow_Delta(US_CSI, US_CSI$meta$Condition == "CSI", lab = "kmeans.25")
PlotBehaviorFlow_Delta(US_CSI, US_CSI$meta$Condition == "CSI", lab = "kmeans.25", method = "up")
PlotBehaviorFlow_Delta(US_CSI, US_CSI$meta$Condition == "CSI", lab = "kmeans.25", method = "down")
US_CSI <- TwoGroupAnalysis(US_CSI, US_CSI$meta$Condition)
## [1] "no label specified, performing analysis for all"
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
#classical readouts
head(US_CSI$Results[[1]]$Statistics$raw)
## name p FDR
## 16 bodycentre.center.distance.moving 5.386446e-07 4.883687e-05
## 23 bodycentre.center.transitions 5.536424e-07 4.883687e-05
## 15 bodycentre.center.raw.distance 1.436374e-06 7.322318e-05
## 19 bodycentre.center.time.moving 1.660199e-06 7.322318e-05
## 35 bodycentre.corners.raw.speed 2.126009e-05 7.080668e-04
## 40 bodycentre.corners.percentage.moving 2.858221e-05 7.080668e-04
#clusters stats
head(US_CSI$Results[[1]]$Statistics$kmeans.25)
## name p FDR
## 40 X14.count 1.301842e-06 0.000210856
## 18 X21.count 1.874607e-06 0.000210856
## 17 X21.nframes 2.079875e-05 0.001559631
## 44 X3.count 3.091483e-05 0.001738652
## 46 X25.count 8.459415e-05 0.003806065
## 43 X3.nframes 1.025069e-03 0.038433304
#clusters Transition Stats
head(US_CSI$Results[[1]]$TransitionStats$kmeans.25$transitions)
## from to p.value CSI Control FC FDR
## 45 16 14 6.123591e-06 10.466667 6.206897 1.686296 0.01576857
## 497 14 3 1.549739e-05 19.733333 13.344828 1.478725 0.01995330
## 534 3 21 1.697303e-04 13.400000 9.344828 1.433948 0.14568817
## 548 3 25 3.802951e-04 8.033333 4.862069 1.652246 0.21421873
## 570 25 14 4.315642e-04 5.333333 3.206897 1.663082 0.21421873
## 20 7 14 5.105470e-04 8.666667 5.896552 1.469786 0.21421873
US$Results <- US_CSI$Results
PlotTransitionsStats(US_CSI)
US_FST45 <-SplitUSData(US, select = (US$meta$Session == "FST45min"))
US_FST45$Results <- NULL
PlotBehaviorFlow_Delta(US_FST45, US_FST45$meta$Treatment == "Swim", lab = "kmeans.25")
PlotBehaviorFlow_Delta(US_FST45, US_FST45$meta$Treatment == "Swim", lab = "kmeans.25", method = "up")
PlotBehaviorFlow_Delta(US_FST45, US_FST45$meta$Treatment == "Swim", lab = "kmeans.25", method = "down")
US_FST45 <- TwoGroupAnalysis(US_FST45, US_FST45$meta$Treatment)
## [1] "no label specified, performing analysis for all"
#classical readouts
head(US_FST45$Results[[1]]$Statistics$raw)
## name p FDR
## 10 bodycentre.speed.moving 0.0001262400 0.00721099
## 26 bodycentre.periphery.raw.speed 0.0001505386 0.00721099
## 7 bodycentre.raw.distance 0.0001867722 0.00721099
## 9 bodycentre.raw.speed 0.0001918230 0.00721099
## 8 bodycentre.distance.moving 0.0002125056 0.00721099
## 27 bodycentre.periphery.speed.moving 0.0002452436 0.00721099
#clusters stats
head(US_FST45$Results[[1]]$Statistics$kmeans.25)
## name p FDR
## 24 X1.count 0.01440250 1
## 23 X1.nframes 0.01882770 1
## 2 X7.count 0.02277431 1
## 27 X11.nframes 0.02443449 1
## 42 X9.count 0.02742777 1
## 28 X11.count 0.03081429 1
#clusters Transition Stats
head(US_FST45$Results[[1]]$TransitionStats$kmeans.25$transitions)
## from to p.value Swim Control FC FDR
## 307 12 24 0.002928148 0.8000000 0.0000000 Inf 1
## 289 1 11 0.010457751 30.0666667 42.8666667 0.7013997 1
## 163 24 12 0.010467054 0.8000000 0.1333333 6.0000000 1
## 458 5 4 0.011236314 0.2000000 1.2666667 0.1578947 1
## 4 7 20 0.012817522 0.2666667 1.0666667 0.2500000 1
## 407 15 24 0.013410074 0.8000000 0.2000000 4.0000000 1
US$Results <- append(US$Results, US_FST45$Results)
PlotTransitionsStats(US_FST45)
US_FST24 <-SplitUSData(US, select = (US$meta$Session == "FST24h"))
US_FST24$Results <- NULL
PlotBehaviorFlow_Delta(US_FST24, US_FST24$meta$Treatment == "Swim", lab = "kmeans.25")
PlotBehaviorFlow_Delta(US_FST24, US_FST24$meta$Treatment == "Swim", lab = "kmeans.25", method = "up")
PlotBehaviorFlow_Delta(US_FST24, US_FST24$meta$Treatment == "Swim", lab = "kmeans.25", method = "down")
US_FST24 <- TwoGroupAnalysis(US_FST24, US_FST24$meta$Treatment)
## [1] "no label specified, performing analysis for all"
#classical readouts
head(US_FST24$Results[[1]]$Statistics$raw)
## name p FDR
## 27 bodycentre.periphery.speed.moving 0.02327161 1
## 10 bodycentre.speed.moving 0.02915563 1
## 12 bodycentre.total.time 0.07172900 1
## 34 bodycentre.corners.distance.moving 0.08488321 1
## 25 bodycentre.periphery.distance.moving 0.08596782 1
## 24 bodycentre.periphery.raw.distance 0.08880880 1
#clusters stats
head(US_FST24$Results[[1]]$Statistics$kmeans.25)
## name p FDR
## 30 X22.count 0.00224739 0.5055735
## 14 X24.count 0.01139820 1.0000000
## 31 X17.nframes 0.01935312 1.0000000
## 32 X17.count 0.02700361 1.0000000
## 13 X24.nframes 0.03021485 1.0000000
## 36 X2.count 0.03056139 1.0000000
#clusters Transition Stats
head(US_FST24$Results[[1]]$TransitionStats$kmeans.25$transitions)
## from to p.value Swim Control FC FDR
## 161 24 13 0.001821792 0.93333333 2.33333333 0.40000000 1
## 377 17 16 0.002583943 0.66666667 0.06666667 10.00000000 1
## 70 10 14 0.005341900 0.06666667 1.13333333 0.05882353 1
## 110 8 6 0.008736777 8.53333333 11.53333333 0.73988439 1
## 390 17 22 0.008791038 1.06666667 2.40000000 0.44444444 1
## 307 12 24 0.013517382 0.46666667 0.00000000 Inf 1
US$Results <- append(US$Results, US_FST24$Results)
PlotTransitionsStats(US_FST24)
US_Tun <-SplitUSData(US, select = (US$meta$Session == "PreYoh"))
US_Tun$Results <- NULL
PlotBehaviorFlow_Delta(US_Tun, US_Tun$meta$Condition == "Tunnel", lab = "kmeans.25")
PlotBehaviorFlow_Delta(US_Tun, US_Tun$meta$Condition == "Tunnel", lab = "kmeans.25",method = "up")
PlotBehaviorFlow_Delta(US_Tun, US_Tun$meta$Condition == "Tunnel", lab = "kmeans.25", method = "down")
US_Tun <- TwoGroupAnalysis(US_Tun, US_Tun$meta$Condition)
## [1] "no label specified, performing analysis for all"
#classical readouts
head(US_Tun$Results[[1]]$Statistics$raw)
## name p FDR
## 26 bodycentre.periphery.raw.speed 3.004594e-05 0.002390231
## 8 bodycentre.distance.moving 4.077278e-05 0.002390231
## 7 bodycentre.raw.distance 5.419182e-05 0.002390231
## 9 bodycentre.raw.speed 5.578236e-05 0.002390231
## 35 bodycentre.corners.raw.speed 7.240348e-05 0.002390231
## 27 bodycentre.periphery.speed.moving 8.129103e-05 0.002390231
#clusters stats
head(US_Tun$Results[[1]]$Statistics$kmeans.25)
## name p FDR
## 1 X7.nframes 0.002877083 0.2919733
## 44 X3.count 0.003760699 0.2919733
## 43 X3.nframes 0.003893665 0.2919733
## 2 X7.count 0.005646607 0.3175655
## 28 X11.count 0.007882055 0.3546299
## 24 X1.count 0.013144514 0.4679903
#clusters Transition Stats
head(US_Tun$Results[[1]]$TransitionStats$kmeans.25$transitions)
## from to p.value Tunnel Control FC FDR
## 326 11 7 8.264637e-05 36.65 25.25 1.4514851 0.2234578
## 289 1 11 2.104314e-03 41.10 31.45 1.3068362 1.0000000
## 109 8 21 5.168102e-03 0.80 1.80 0.4444444 1.0000000
## 549 3 23 5.218830e-03 8.30 4.30 1.9302326 1.0000000
## 497 14 3 5.478437e-03 21.10 16.80 1.2559524 1.0000000
## 262 13 1 5.493013e-03 6.20 3.85 1.6103896 1.0000000
US$Results <- append(US$Results, US_Tun$Results)
PlotTransitionsStats(US_Tun)
US_SA <-SplitUSData(US, select = (US$meta$Session == "PostSA"))
US_SA$Results <- NULL
PlotBehaviorFlow_Delta(US_SA, US_SA$meta$Treatment == "SA", lab = "kmeans.25")
PlotBehaviorFlow_Delta(US_SA, US_SA$meta$Treatment == "SA", lab = "kmeans.25", method = "up")
PlotBehaviorFlow_Delta(US_SA, US_SA$meta$Treatment == "SA", lab = "kmeans.25", method = "down")
US_SA <- TwoGroupAnalysis(US_SA, US_SA$meta$Treatment)
## [1] "no label specified, performing analysis for all"
#classical readouts
head(US_SA$Results[[1]]$Statistics$raw)
## name p FDR
## 31 bodycentre.periphery.percentage.moving 8.544603e-05 0.006290669
## 40 bodycentre.corners.percentage.moving 9.503969e-05 0.006290669
## 35 bodycentre.corners.raw.speed 1.069719e-04 0.006290669
## 26 bodycentre.periphery.raw.speed 2.851125e-04 0.007474227
## 13 bodycentre.time.stationary 2.933267e-04 0.007474227
## 14 bodycentre.percentage.moving 2.957890e-04 0.007474227
#clusters stats
head(US_SA$Results[[1]]$Statistics$kmeans.25)
## name p FDR
## 40 X14.count 0.001466045 0.3298018
## 46 X25.count 0.003328359 0.3743743
## 28 X11.count 0.007608333 0.4331980
## 41 X9.nframes 0.007702657 0.4331980
## 44 X3.count 0.012313689 0.5026469
## 42 X9.count 0.013406284 0.5026469
#clusters Transition Stats
head(US_SA$Results[[1]]$TransitionStats$kmeans.25$transitions)
## from to p.value SA Control FC FDR
## 483 14 4 0.0004480593 1.2333333 2.5517241 0.4833333 1
## 497 14 3 0.0031154643 8.6333333 11.7241379 0.7363725 1
## 557 25 24 0.0036088107 0.1333333 0.6206897 0.2148148 1
## 420 15 14 0.0044014861 3.0333333 4.8275862 0.6283333 1
## 411 15 13 0.0047287440 0.7666667 0.2413793 3.1761905 1
## 86 20 13 0.0055192157 4.3333333 2.8965517 1.4960317 1
US$Results <- append(US$Results, US_SA$Results)
PlotTransitionsStats(US_SA)
US_Yoh <-SplitUSData(US, select = (US$meta$Session == "PostYoh"))
US_Yoh$Results <- NULL
PlotBehaviorFlow_Delta(US_Yoh, US_Yoh$meta$Treatment == "Yohimbin", lab = "kmeans.25")
PlotBehaviorFlow_Delta(US_Yoh, US_Yoh$meta$Treatment == "Yohimbin", lab = "kmeans.25", method = "up")
PlotBehaviorFlow_Delta(US_Yoh, US_Yoh$meta$Treatment == "Yohimbin", lab = "kmeans.25", method = "down")
US_Yoh <- TwoGroupAnalysis(US_Yoh, US_Yoh$meta$Treatment)
## [1] "no label specified, performing analysis for all"
#classical readouts
head(US_Yoh$Results[[1]]$Statistics$raw)
## name p FDR
## 2 classifications.Supported.count 6.183963e-06 0.001090976
## 36 bodycentre.corners.speed.moving 5.208191e-05 0.003886288
## 1 classifications.Supported.time 7.699250e-05 0.003886288
## 18 bodycentre.center.speed.moving 1.078355e-04 0.003886288
## 4 classifications.None.count 1.101429e-04 0.003886288
## 35 bodycentre.corners.raw.speed 1.575135e-04 0.004631430
#clusters stats
head(US_Yoh$Results[[1]]$Statistics$kmeans.25)
## name p FDR
## 1 X7.nframes 0.0001877460 0.03729122
## 2 X7.count 0.0003315361 0.03729122
## 6 X10.count 0.0008877689 0.05256664
## 16 X4.count 0.0010335411 0.05256664
## 18 X21.count 0.0011683538 0.05256664
## 28 X11.count 0.0018196153 0.06822353
#clusters Transition Stats
head(US_Yoh$Results[[1]]$TransitionStats$kmeans.25$transitions)
## from to p.value Saline Yohimbin FC FDR
## 356 22 18 0.0001299529 1.4 3.966667 0.3529412 0.2826007
## 289 1 11 0.0002114144 31.3 17.933333 1.7453532 0.2826007
## 62 10 1 0.0003685239 11.7 6.733333 1.7376238 0.3284074
## 326 11 7 0.0010857317 29.8 12.033333 2.4764543 0.6922640
## 430 2 8 0.0014258274 0.0 0.300000 0.0000000 0.6922640
## 5 7 8 0.0015536542 11.2 4.966667 2.2550336 0.6922640
US$Results <- append(US$Results, US_Yoh$Results)
saveRDS(US,"../data/USData_Results.rds")
PlotTransitionsStats(US_Yoh)